library(tidyverse)
library(readxl)
library(patchwork)
library(kableExtra)
library(knitr)
library(ranger)
library(vip)




|
Pitch
|
Horizontal
|
Vertical
|
Pitch Proportion
|
Spin Rate
|
|
4-Seamer
|
7.45
|
14.86
|
0.46
|
2285.29
|
|
Changeup
|
14.03
|
32.27
|
0.26
|
1754.87
|
|
Curveball
|
9.45
|
53.35
|
0.26
|
2572.18
|
|
Cutter
|
2.88
|
25.97
|
0.34
|
2380.57
|
|
Sinker
|
15.00
|
22.89
|
0.39
|
2127.16
|
|
Slider
|
6.42
|
36.28
|
0.34
|
2432.44
|
|
Splitter
|
11.71
|
33.09
|
0.28
|
1459.77
|



Pitch Classifications for Stuff+
|
Class
|
Stf+ Range
|
|
1
|
< 55
|
|
2
|
< 70
|
|
3
|
< 85
|
|
4
|
< 95
|
|
5
|
< 105
|
|
6
|
< 115
|
|
7
|
< 130
|
|
8
|
< 145
|
|
9
|
< 160
|
|
10
|
≥ 160
|











base <- Data2 %>%
select(Name, year, pitch_hand:Stuff, `Stf+ Pitch`, pitcher_break_z,
pitcher_break_x, avg_speed, spin_rate, pitches_thrown, pitch_per,
run_value:hard_hit_percent, fb_type:speed_diff)
tree_input <- base %>%
filter(!is.na(`Stf+ Pitch`), !is.na(run_value), !is.na(spin_rate),
!is.na(fb_stuff), !is.na(fb_speed), !is.na(fb_thrown),
!is.na(speed_diff)) %>%
select(-Name:-pitch_type_name, -fb_type) %>%
rename(pitch_stuff = `Stf+ Pitch`) %>%
select(-est_woba, -slg, -ba, -run_value_per_100, -run_value, -est_slg,
-est_ba)
mlb_rf <- ranger(woba ~ ., data = tree_input, num.trees = 1000,
importance = "impurity")
mlb_rf
## Ranger result
##
## Call:
## ranger(woba ~ ., data = tree_input, num.trees = 1000, importance = "impurity")
##
## Type: Regression
## Number of trees: 1000
## Sample size: 2074
## Number of independent variables: 18
## Mtry: 4
## Target node size: 5
## Variable importance mode: impurity
## Splitrule: variance
## OOB prediction error (MSE): 0.002031904
## R squared (OOB): 0.5914404
vip(mlb_rf, geom = "point") + theme_bw()



